############################################################################
# Replication Code for                                                     #
# "The Effects of Forced Versus Selective Exposure to Propaganda in China" #
# Political Science Research and Methods                                   #
# Author: Xiaoxiao SHEN                                                    #
# Date: July 6, 2025                                                       #
############################################################################



rm(list=ls())

# Install necessary packages if not already installed
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("broom")) install.packages("broom")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("plyr")) install.packages("plyr")
if (!require("dplyr")) install.packages("dplyr")
if (!require("fuzzyjoin")) install.packages("fuzzyjoin")
if (!require("stringr")) install.packages("stringr")
if (!require("knitr")) install.packages("knitr")
if (!require("gridExtra")) install.packages("gridExtra")
if (!require("estimatr")) install.packages("estimatr")
if (!require("Matrix")) install.packages("Matrix")
if (!require("mediation")) install.packages("mediation")
if (!require("GGally")) install.packages("GGally")
if (!require("stargazer")) install.packages("stargazer")
if (!require("irr")) install.packages("irr")
if (!require("GK2011")) install.packages("GK2011")
if (!require("psych")) install.packages("psych")
if (!require("nFactors")) install.packages("nFactors")



### Step1: Load packages ###

library(ggplot2)
library(broom)
library(tidyverse)
library(plyr)
library(dplyr)
library(fuzzyjoin)
library(stringr)
library(knitr)
library(gridExtra)
library(estimatr)
library(Matrix)
library(mediation)
library(GGally)
library(stargazer)
library(irr)
library(GK2011)
library(psych)
library(nFactors)



# Set data directory --- Please replace "XXX" with the appropriate path
data_dir <- "/XXX/Replication_Xiaoxiao Shen_PSRM/Data" #
output_dir <- "/XXX/Replication_Xiaoxiao Shen_PSRM/Output/Survey3"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}



### Step2: Load data ###
header <- read.csv(file.path(data_dir, "Survey3.csv"), header = FALSE, nrows = 1, as.is = TRUE)
data <- read.csv(file.path(data_dir, "Survey3.csv"), header= FALSE , na=c("", "NA")) # 1396 obs. of 121 variables 
colnames(data) = header
rm(header) 

data <- data%>%
        filter(data$"Finished"== 1 ) # 988 obs. of 121 variables 
data <- data%>%
        filter(data$"consent"== 1 ) # 976 obs. of 121 variables
data <- data%>%
        filter(data$"summary_check_RA1"!= 99 ) # 897 obs. of 121 variables
data <- data%>%
        filter(data$"summary_check_RA2"!= 99 ) # 897 obs. of 121 variables
# data <- data%>%
#         filter(data$"thought_check_RA1"!= 99 )
# data <- data%>%
#         filter(data$"thought_check_RA2"!= 99 )



### Step3: Data Cleaning ###

# 3.1 Assign treatment
data$treatment=0

treatment1=which(data$group=="1_forced_propaganda")
data$treatment[treatment1]=1 # force_polit

treatment2=which(data$group=="2_forced_nonpropaganda")
data$treatment[treatment2]=2 # force_science

treatment3=which(data$select==1)
data$treatment[treatment3]=3 # select_polit

treatment4=which(data$select==2)
data$treatment[treatment4]=4 # select_science

# drop=which(data$treatment==0)
# data=data[-drop,] # some samples do not have a clear type

data$treatment=factor(data$treatment,
                    labels=c("force_polit","force_science","select_polit","select_science")) # 897 obs. of 122 variables

table(data$treatment) # force_polit:210, force_science:214, select_polit:401, select_science:72

rm(treatment1);rm(treatment2);rm(treatment3);rm(treatment4)


# 3.2 Recode material (1 = read before)
data$material[data$material==2] <- 0 # 897 obs. of 122 variables
data$material <- as.numeric(data$material)
table(data$material) # 0:668, 1:229


# 3.3 Recode gender (1 = female)
data$gender <- ifelse(data$gender==2,1,0) # 897 obs. of 122 variables
data$gender <- as.numeric(data$gender)
table(data$gender) # 0:410, 1:487


# 3.4 Recode age (in years)
# (1) Delete obs under 18
# (2) 1 for obs 30 or younger, 0 for obs older than 30
data$age <- as.character(data$age)
class(data$age)
data$age <- 2020-as.numeric(data$age)
data$age[data$age>=100|data$age<=0] <- NA
data <- data%>%
        filter(data$"age">= 18 ) 
data$age <- ifelse(data$age <= 30,1,0) # 875 obs. of 122 variables
data$age <- as.numeric(data$age)
table(data$age) # 0: 152, 1: 723


# 3.5 Recode education
# 1 for college degree "7" or above, 0 for others
data$education[data$education==10] <- NA
data$education <- ifelse(data$education >= 7,1,0) # 875 obs. of 122 variables
data$education <- as.numeric(data$education)
table(data$education) # 0: 424， 1: 450


# 3.6 Recode urban hukou
# 0 for urban hukou, 0 for others
data$hukou[data$hukou==5] <- NA
data$urban <- ifelse(data$hukou==2|data$hukou==4,1,0) # 875 obs. of 122 variables
data$urban <- as.numeric(data$urban)
table(data$urban) # 0: 451, 1: 423


# 3.7 Recode CCP
# 1 for CCP member, 0 for others
data$party <- ifelse(data$party==1,1,0) # 875 obs. of 122 variables
data$party <- as.numeric(data$party)
table(data$party) # 0: 750, 1:125


# 3.8 Recode income
# 1 for high income, 0 for others
# Code 4,5,6 as high income
data$income <- ifelse(data$income==7|data$income==8|data$income==9,0,data$income)
data$income <- ifelse(data$income==4|data$income==5|data$income==6,1,0) # 875 obs. of 122 variables
data$income <- as.numeric(data$income) 
table(data$income) # 0:607, 1: 268


# 3.9 Recode knowledge
# Respondents' political knowledge is scored as follows: 
# 0 if both questions are answered incorrectly; 
# 0.5 if one is answered correctly; 
# and 1 if both are answered correctly.
data$political_knowledge1 <- ifelse(data$political_knowledge1==1,1,0)
table(data$political_knowledge1) # 0: 192, 1: 683
data$political_knowledge2 <- ifelse(data$political_knowledge2==4,1,0)
table(data$political_knowledge2) # 0: 435, 1: 440
data$knowledge <- (data$political_knowledge1+data$political_knowledge2)/2 # 875 obs. of 123 variables
data$knowledge <- as.numeric(data$knowledge) 
table(data$knowledge) # 0: 139， 0.5: 349， 1: 387


# 3.10 Recode marriage
# 0 = No (never married/divorced/other), 1 = Yes (married/widowed)
data$marriage <- ifelse(data$marriage==2|data$marriage==4, 1, 0) # 875 obs. of 123 variables
data$marriage <- as.numeric(data$marriage)
table(data$marriage) # 0:619, 1:256


# 3.11 Recode interst
data$interest <- data$political_interest
data$interest <- as.numeric(data$interest) # 875 obs. of 124 variables
table(data$interest)  # 1: 14, 2: 125, 3: 261, 4: 382, 5: 93


# 3.12 Recode news_consumption
data$news_consumption <- as.numeric(data$news_consumption) # 875 obs. of 124 variables
table(data$news_consumption) # 1:167, 2: 96, 3: 267, 4: 185, 5:160


# 3.13 Recode occupation 
# Government agency is coded as 1 
# (1: government department, 
# 2: state-owned public institution [science, education, culture, health], 
# 3: state-owned enterprise); 
# non-government agency is coded as 0.
data$occupation <- ifelse(data$occupation==1|data$occupation==2|data$occupation==3, 1, 0) # 875 obs. of 124 variables
data$occupation <- as.numeric(data$occupation)
table(data$occupation) # 0: 698, 1: 177


# 3.14 Recode major
# 1 for social science, 0 for non-social science
data$major <- ifelse(data$major==3, 1, 0) # 875 obs. of 124 variables
data$major <- as.numeric(data$major)
table(data$major) # 0: 734, 1:141


# 3.15 Recode reading_time
data$reading_time <- as.numeric(data$reading_time) # 875 obs. of 124 variables


# 3.16 Recode confidence_2
data$confidence_2 <- as.numeric(data$confidence_2) # 875 obs. of 124 variables
which(colnames(data) == "confidence_2")

data[,57]
# 1<->5
condition=data[,57]==1
data[condition,57]=6
condition=data[,57]==5
data[condition,57]=1
condition=data[,57]==6
data[condition,57]=5
# 2<->4
condition=data[,57]==2
data[condition,57]=6
condition=data[,57]==4
data[condition,57]=2
condition=data[,57]==6
data[condition,57]=4

rm(condition) 


# 3.17 Recode ethnicity
data$ethnicity <- ifelse(data$ethnicity==1, 1, 0) # 875 obs. of 124 variables
data$ethnicity <- as.numeric(data$ethnicity)
table(data$ethnicity) # 0: 58, 1: 817


# 3.18 summary_check_RA
# Use package irr to calculate intercoder reliability，method: Cohen's Kappa
table(data$summary_check_RA1)
table(data$summary_check_RA2)
data$summary_check_RA1 <- as.numeric(data$summary_check_RA1)
data$summary_check_RA2 <- as.numeric(data$summary_check_RA2)

kappa_result <- kappa2(data.frame(data$summary_check_RA1, data$summary_check_RA2))


# 3.19 Summary statistics
selected_vars <- c("gender", "age", "education", "urban", "party", "income", "marriage", "occupation", "major", "ethnicity")

one_proportions <- sapply(data[selected_vars], function(x) mean(x == 1, na.rm = TRUE))

summary_stat <- data.frame(
  variable = names(one_proportions),
  proportion_of_1 = round(one_proportions, 3),
  stringsAsFactors = FALSE
)

summary_stat <- rbind(
  summary_stat,
  data.frame(
    variable = "N",
    proportion_of_1 = nrow(data)
  )
)

write.csv(summary_stat, file = file.path(output_dir, "3_summary_stat.csv"), row.names = FALSE)



### Step4: Balance Plot ###

data$forced_prop <- data$group=="1_forced_propaganda" # 875 obs. of 125 variables
data$forced_nonprop <- data$group=="2_forced_nonpropaganda" # 875 obs. of 126 variables
data$select <- data$group=="3_select" 

forced_prop <- summary(lm(forced_prop~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data))
forced_nonprop <- summary(lm(forced_nonprop~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data))
select <- summary(lm(select~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data))

forced_prop.1 <- as.data.frame(forced_prop$coefficients)
forced_prop.1$Group <- 'forced_prop'

forced_nonprop.1 <- as.data.frame(forced_nonprop$coefficients)
forced_nonprop.1$Group <- 'forced_nonprop'

select.1 <- as.data.frame(select$coefficients)
select.1$Group <- 'select'

all_4.rap = rbind.data.frame(forced_prop.1, forced_nonprop.1, select.1)
all_4.rap$Estimate = round(all_4.rap$Estimate, 3)
all_4.rap$`Std. Error` = round(all_4.rap$`Std. Error`, 3)
all_4.rap$`t value` = round(all_4.rap$`t value`, 3)
all_4.rap$`Pr(>|t|)` = round(all_4.rap$`Pr(>|t|)`, 3)
write.csv(all_4.rap, file = file.path(output_dir, "4_balance_check.csv"), row.names = TRUE)



### Step5: Manipulation check ###

data$class=0 # 875 obs. of 127 variables

class1=which(data$group=="1_forced_propaganda")
data$class[class1]=1 # forced

class1=which(data$group=="2_forced_nonpropaganda")
data$class[class1]=1 # forced

class2=which(data$group=="3_select")
data$class[class2]=2 # select

data$class=factor(data$class,
                labels=c("forced","select"))

table(data$class) # forced: 409, select: 466

### First, divide the observation into forced and selective, and then look at the proportion of "1" in the two RAs.

# # Calculate the proportion of 0 and 1 in summary_check_RA1 in the group with class "forced"
table_RA1_forced <- table(data$class == "forced", data$summary_check_RA1)
prop_RA1_forced <- prop.table(table_RA1_forced, 1)

# Calculate the proportion of 0 and 1 in summary_check_RA2 in the group with class "forced"
table_RA2_forced <- table(data$class == "forced", data$summary_check_RA2)
prop_RA2_forced <- prop.table(table_RA2_forced, 1)

# Results
prop_RA1_forced
prop_RA2_forced

# See whether there is a significant difference in the proportions of 0 and 1 in
# summary_check_RA1 and summary_check_RA2 among the people whose classes are forced and selected.

# Generate a contingency table of class and summary_check_RA1
table_RA1 <- table(data$class, data$summary_check_RA1)
table_RA2 <- table(data$class, data$summary_check_RA2)

# Compare whether there is a significant difference in the proportions of 0 and 1 in the forced and select groups
sink(file.path(output_dir, "5_accuracy_of_article_summaries.txt"))
print("The proportion of 0 and 1 in summary_check_RA1 in the group with class `forced`")
prop_RA1_forced
print("The proportion of 0 and 1 in summary_check_RA2 in the group with class `forced`")
prop_RA2_forced
chisq.test(table_RA1)
chisq.test(table_RA2)
sink()


# effort
data$effort = as.numeric(data$effort)
data$class = as.factor(data$class)

summary(aov(data$effort~class,data=data))
TukeyHSD(aov(data$effort~class,data=data))

effort.1=TukeyHSD(aov(data$effort~class,data=data))
effort.1=as.data.frame(effort.1$class)


# trivialization 
data$trivialization = as.numeric(data$trivialization)
data$class = as.factor(data$class)

summary(aov(data$trivialization~class,data=data)) 
TukeyHSD(aov(data$trivialization~class,data=data))

trivialization.1=TukeyHSD(aov(data$trivialization~class,data=data))
trivialization.1=as.data.frame(trivialization.1$class)

manipulation_check = rbind.data.frame(effort.1, trivialization.1)
manipulation_check$SE = (manipulation_check$upr - manipulation_check$lwr)/1.96/2
manipulation_check = round(manipulation_check, 3)
manipulation_check$Variable = c("Effort", "Trivialization")

write.csv(manipulation_check, file = file.path(output_dir, "5_manipulation_check.csv"), row.names = TRUE)



### Step6: Marginal effect ###

# 6.1 reverse function
reverse=function(x,old,new,store=-1){ 
        # the function reverses the original score to a new one
        # "store" is a temp store which keeps the dt, make sure "can" is not equal to any one of "old" and "new"
        # make sure old and new are the same length
        # "x" is a vector
        numb=length(old)
        for(i in 1:numb){
                x[which(x==old[i])] = store
                x[which(x==new[i])] = old[i]
                x[which(x==store)] = new[i]
        }
        return(x) # return a vector
}


# 6.2 rescale covariates
range_0_1=function(x,range=NULL){ 
        # if do not give range, make sure both best and worst answer has sample
        if(is.null(range)){
                min=min(x,na.rm = TRUE)
                max=max(x,na.rm = TRUE)
        }else{
                min=min(range)
                max=max(range)
        }
        if(min>0){
                print(min)
                warning("minimum value larger than 0, make sure it is correct")
        }
        result=(x-min)/(max-min)
        return(result)
}


# 6.3 select_polit=1, select_science=0
data$select_polit=NA # 875 obs. of 128 variables
data$select_polit[which(data$treatment=="select_polit")]=1
data$select_polit[which(data$treatment=="select_science")]=0 # no not need to choose extra variable, just use "select" is enough
table(data$select_polit) # 0: 71, 1: 395


# 6.4 prepare: political_ideology / political_interest / political_knowledge / news_consumption

# 6.4.1 political_ideology
std=function(x){
        return((x-mean(x))/sd(x))
}

data$political_ideology_1 = as.numeric(data$political_ideology_1)
data$political_ideology_2 = as.numeric(data$political_ideology_2)
data$political_ideology_3 = as.numeric(data$political_ideology_3)

data$ideology=data$political_ideology_1+data$political_ideology_2+data$political_ideology_3 # 875 obs. of 129 variables


# 6.4.2 political_interest
data$interest = as.numeric(data$interest)
table(data$interest) # 1:14, 2:125, 3:261, 4:382, 5:93

data$interest=range_0_1(data$interest)
table(data$interest) # 0:14, 0.25:125, 0.5:261, 0.75:382, 1:93 


# 6.4.3 political_knowledge
data$knowledge = as.numeric(data$knowledge)
table(data$knowledge) # 0:139, 0.5:349, 1:387

data$knowledge=range_0_1(data$knowledge)
table(data$knowledge) # 0:139, 0.5:349, 1:387


# 6.4.4 news_consumption
data$news_consumption <- as.numeric(data$news_consumption)
table(data$news_consumption) # 1:167, 2: 96, 3: 267, 4: 185, 5:160

data$news_consumption=range_0_1(data$news_consumption)
table(data$news_consumption) # 0:167, 0.25:96, 0.5:267, 0.75:185, 1:160


# 6.5 logistic regression

# 6.5.1 binomial links to logit regression
marginal.reg=glm(select_polit~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data,family="binomial")


# 6.5.2 summary 
marginal_reg_summary = as.data.frame(summary(marginal.reg)$coefficients)
marginal_reg_summary = round(marginal_reg_summary, 3)
write.csv(marginal_reg_summary, file = file.path(output_dir, "8_marginal_effects.csv"), row.names = TRUE)



### Step7: Main effects ###

# 7.1 rand
data$rand=0 # 875 obs. of 130 variables

rand1=which(data$group=="1_forced_propaganda")
data$rand[rand1]=1 # random

rand1=which(data$group=="2_forced_nonpropaganda")
data$rand[rand1]=1 # random

rand2=which(data$group=="3_select")
data$rand[rand2]=0 # select

table(data$rand) # 0: 466, 1: 409


# 7.2 tr
data$tr=0 # 875 obs. of 131 variables

tr1=which(data$treatment=="force_polit")
data$tr[tr1]=1 # treated

tr1=which(data$treatment=="select_polit")
data$tr[tr1]=1 # treated

tr2=which(data$treatment=="force_science")
data$tr[tr2]=0 # control

tr2=which(data$treatment=="select_science")
data$tr[tr2]=0 # control

table(data$tr) # 0: 277, 1: 598


# 7.3 gk.estimate
data$central_trust = as.numeric(data$central_trust)
data$local_trust = as.numeric(data$local_trust)
data$central_satisfaction = as.numeric(data$central_satisfaction)
data$local_satisfaction = as.numeric(data$local_satisfaction)
data$evaluation_now = as.numeric(data$evaluation_now)
data$evaluation_future = as.numeric(data$evaluation_future)
data$confidence_1 = as.numeric(data$confidence_1)
data$confidence_2 = as.numeric(data$confidence_2)
data$efficacy_1 = as.numeric(data$efficacy_1)
data$efficacy_2 = as.numeric(data$efficacy_2)
data$pride_1 = as.numeric(data$pride_1)
data$pride_2 = as.numeric(data$pride_2)

set.seed(12345)
gk.estimate=list()
gk.estimate[[1]]=estimate(rand = data$rand, tr = data$tr, y = data$central_trust)
gk.estimate[[2]]=estimate(rand = data$rand, tr = data$tr, y = data$local_trust)
gk.estimate[[3]]=estimate(rand = data$rand, tr = data$tr, y = data$central_satisfaction)
gk.estimate[[4]]=estimate(rand = data$rand, tr = data$tr, y = data$local_satisfaction)
gk.estimate[[5]]=estimate(rand = data$rand, tr = data$tr, y = data$evaluation_now)
gk.estimate[[6]]=estimate(rand = data$rand, tr = data$tr, y = data$evaluation_future)
gk.estimate[[7]]=estimate(rand = data$rand, tr = data$tr, y = data$confidence_1)
gk.estimate[[8]]=estimate(rand = data$rand, tr = data$tr, y = data$confidence_2)
gk.estimate[[9]]=estimate(rand = data$rand, tr = data$tr, y = data$efficacy_1)
gk.estimate[[10]]=estimate(rand = data$rand, tr = data$tr, y = data$efficacy_2)
gk.estimate[[11]]=estimate(rand = data$rand, tr = data$tr, y = data$pride_1)
gk.estimate[[12]]=estimate(rand = data$rand, tr = data$tr, y = data$pride_2)

list.name=c("central_trust","local_trust",
            "central_satisfaction","local_satisfaction",
            "evaluation_now","evaluation_future",
            "confidence_1","confidence_2",
            "efficacy_1","efficacy_2",
            "pride_1","pride_2")

names(gk.estimate)=list.name 

gk.estimate_central_trust = as.data.frame(gk.estimate$central_trust)
gk.estimate_local_trust = as.data.frame(gk.estimate$local_trust)
gk.estimate_central_satisfaction = as.data.frame(gk.estimate$central_satisfaction)
gk.estimate_local_satisfaction = as.data.frame(gk.estimate$local_satisfaction)
gk.estimate_evaluation_now = as.data.frame(gk.estimate$evaluation_now)
gk.estimate_evaluation_future = as.data.frame(gk.estimate$evaluation_future)

gk.estimate_confidence_1 = as.data.frame(gk.estimate$confidence_1)
gk.estimate_confidence_2 = as.data.frame(gk.estimate$confidence_2)
gk.estimate_efficacy_1 = as.data.frame(gk.estimate$efficacy_1)
gk.estimate_efficacy_2 = as.data.frame(gk.estimate$efficacy_2)
gk.estimate_pride_1 = as.data.frame(gk.estimate$pride_1)
gk.estimate_pride_2 = as.data.frame(gk.estimate$pride_2)

gk.estimate_central_trust['name'] = rep("central_trust", times=4)
gk.estimate_local_trust['name'] = rep("local_trust", times=4)
gk.estimate_central_satisfaction['name'] = rep("central_satisfaction", times=4)
gk.estimate_local_satisfaction['name'] = rep("local_satisfaction", times=4)
gk.estimate_evaluation_now['name'] = rep("evaluation_now", times=4)
gk.estimate_evaluation_future['name'] = rep("evaluation_future", times=4)

gk.estimate_confidence_1['name'] = rep("confidence_1", times=4)
gk.estimate_confidence_2['name'] = rep("confidence_2", times=4)
gk.estimate_efficacy_1['name'] = rep("efficacy_1", times=4)
gk.estimate_efficacy_2['name'] = rep("efficacy_2", times=4)
gk.estimate_pride_1['name'] = rep("pride_1", times=4)
gk.estimate_pride_2['name'] = rep("pride_2", times=4)

gk.estimate.rap=rbind.data.frame(gk.estimate_central_trust, gk.estimate_local_trust, 
                                 gk.estimate_central_satisfaction, gk.estimate_local_satisfaction, 
                                 gk.estimate_evaluation_now, gk.estimate_evaluation_future,
                                 gk.estimate_confidence_1, gk.estimate_confidence_2, 
                                 gk.estimate_efficacy_1, gk.estimate_efficacy_2,
                                 gk.estimate_pride_1, gk.estimate_pride_2)
# # 7.3 gk.estimate.rap
# gk.estimate.rap$Estimate = round(gk.estimate.rap$Estimate, 3)
# gk.estimate.rap$SE = round(gk.estimate.rap$SE, 3)
# gk.estimate.rap$t = round(gk.estimate.rap$t, 3)
# gk.estimate.rap$p = round(gk.estimate.rap$p, 3)

# 7.4 get.star
get.star=function(p){
        p_=abs(p)
        if(p_>1 | p_<0){stop("p_ value do not in (0,1) \n")}
        
        if(p_>=0.1){return("")}
        else if(p_>0.05){return("*")}
        else if(p_>0.01){return("**")}
        else{return("***")}
}


# 7.5 gk.summary
gk.summary=function(gk,name="variable",digit=3){
        dt=as.data.frame(matrix(nrow=2,ncol=4))
        names(dt)=c("name","ATE","selector","non_selector")
        
        for(i in 1:3){
                coef.value=round(gk$Estimate[i],digit)
                p_=gk$p[i]
                star=get.star(p_)
                dt[1,i+1]=paste(coef.value,star,sep="")
                se=paste("(",round(gk$SE[i],digit),")",sep="")
                dt[2,i+1]=se
        }
        
        dt[1,1]=name
        return(dt)
}


# 7.6 gk.gather
gk.gather=function(gk.list,digit=3,NA.insert=FALSE){
        n=length(gk.list) # if you have only one estimate, use gk.summary 
        list.name=names(gk.list)
        result=vector()
        for(i in 1:n){
                gk.table=gk.summary(gk.list[[i]],name=list.name[i],digit=digit)
                if(NA.insert){result=rbind.data.frame(result,NA,gk.table)}
                else{result=rbind.data.frame(result,gk.table)}
        }
        return(result)
}

gk1=gk.gather(gk.estimate,digit=3,NA.insert=FALSE)
gk1 

write.csv(gk1, file = file.path(output_dir, "7_treatment_effects.csv"), row.names = TRUE)



### Step8: political_ideology / political_interest / knowledge / news_consumption ###

# 8.1 political_ideology

# 8.1 high.ideology & low.ideology
data$ideology = as.numeric(data$ideology)
table(data$ideology)

data$high_ideology=ifelse(std(data$ideology)>=0,1,0)
table(data$high_ideology) # 0: 437, 1: 438

high.ideology <- data[data$high_ideology == 1,]
low.ideology <- data[data$high_ideology == 0,]


# 8.2 political_interest

# 8.2 high.interest & low.interest
data$interest = as.numeric(data$interest)
table(data$interest) # 0:14, 0.25:125, 0.5:261, 0.75:382, 1:93

data$high_interest=ifelse(std(data$interest)>=0,1,0)
table(data$high_interest) # 0:400, 1:475

high.interest <- data[data$high_interest == 1,]
low.interest <- data[data$high_interest == 0,]


# 8.3 knowledge

# 8.3 high.knowledge & low.knowledge
data$knowledge = as.numeric(data$knowledge)
table(data$knowledge) # 0:139, 0.5:349, 1:387

data$high_knowledge=ifelse(std(data$knowledge)>=0,1,0)
table(data$high_knowledge) # 0:488, 1:387

high.knowledge <- data[data$high_knowledge == 1,]
low.knowledge <- data[data$high_knowledge == 0,]


# 8.4 news_consumption

# 8.4 high.news_consumption & low.news_consumption
data$news_consumption = as.numeric(data$news_consumption)
table(data$news_consumption) # 0:167, 0.25:96,  0.5:267, 0.75:185, 1:160

data$high_news_consumption=ifelse(std(data$news_consumption)>=0,1,0)
table(data$high_news_consumption) # 0:530, 1:345

high.news_consumption <- data[data$high_news_consumption == 1,]
low.news_consumption <- data[data$high_news_consumption == 0,]


# 8.5 Data for combining
data_for_combining <- data %>%
  dplyr::select(age, education, ethnicity, gender, urban, income, major,
                marriage, occupation, party,
                ideology, interest, news_consumption, knowledge,
                treatment, class, effort, trivialization,
                central_trust, central_satisfaction, local_trust, local_satisfaction,
                evaluation_now, evaluation_future, confidence_1, confidence_2,
                efficacy_1, efficacy_2, pride_1, pride_2, rand, tr,
                emotion_1, emotion_2, emotion_3, emotion_4, emotion_5, emotion_6, group,
                summary_check_RA1, summary_check_RA2)

write.csv(data_for_combining, file = file.path(output_dir, "8_data_for_combining.csv"), row.names = TRUE)



### Step9: PCA Analysis ###

# 9.1 create new dataframe
dt <- as.data.frame(matrix(nrow=dim(data)[1]*2,ncol=0)) 
dt$central <- append(data$central_trust, data$central_satisfaction)
dt$local <- append(data$local_trust, data$local_satisfaction)
dt$evaluation <- append(data$evaluation_now, data$evaluation_future)
dt$confidence <- append(data$confidence_1, data$confidence_2)
dt$efficacy <- append(data$efficacy_1, data$efficacy_2)
dt$pride <- append(data$pride_1, data$pride_2)
dt$rand <- append(data$rand, data$rand)
dt$tr <- append(data$tr, data$tr)
dt$treatment <- append(data$treatment, data$treatment)
dt$emotion_1 <- append(data$emotion_1, data$emotion_1)
dt$emotion_2 <- append(data$emotion_2, data$emotion_2)
dt$emotion_3 <- append(data$emotion_3, data$emotion_3)
dt$emotion_4 <- append(data$emotion_4, data$emotion_4)
dt$emotion_5 <- append(data$emotion_5, data$emotion_5)
dt$emotion_6 <- append(data$emotion_6, data$emotion_6)
dt$effort <- append(data$effort, data$effort)
dt$trivialization <- append(data$trivialization, data$trivialization)


# 9.2 data pca analysis
result_dt=principal(dt[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt
dt$pca_score=result_dt$score[,1]
dt$pca_score

ev <- eigen(cor(dt[,c(1,2,4,5)])) 
nS <- nScree(x=ev$values)
plotnScree(nS)


# Proportion of Variance Explained
sink(file.path(output_dir, "9.0.1 PCA_Proportion of Variance.txt"))
pca_proportion <- princomp(cor(dt[, c(1, 2, 4, 5)]))
summary(pca_proportion)
sink()


# Loadings
sink(file.path(output_dir, "9.0.2 PCA_Loadings.txt"))
pca_proportion <- princomp(cor(dt[,c(1,2,4,5)]))
summary(pca_proportion)[2]
sink()


# Principal Component Analysis on Pro-regime Attitudes
gk_9.estimate=list()
gk_9.estimate[[1]] = estimate(rand = dt$rand, tr = dt$tr, y = dt$pca_score)
list_.name=c("PCA Score")
names(gk_9.estimate)=list_.name
gk_9.estimate_pca_score = as.data.frame(gk_9.estimate$`PCA Score`)
gk_9.estimate_pca_score['name'] = rep("PCA Score", times=4)
gk_9.estimate_pca_score
write.csv(gk_9.estimate_pca_score, file = file.path(output_dir, "9_pca_on_pro_regime_attitudes.csv"), row.names = TRUE)



### Step10: Potential Mechanisms ###

# 10.1 rand
data$rand=0

rand1=which(data$group=="1_forced_propaganda")
data$rand[rand1]=1 # random

rand1=which(data$group=="2_forced_nonpropaganda")
data$rand[rand1]=1 # random

rand2=which(data$group=="3_select")
data$rand[rand2]=0 # select

table(data$rand) # 0: 466, 1: 409


# 10.2 tr
data$tr=0

tr1=which(data$treatment=="force_polit")
data$tr[tr1]=1 # treated

tr1=which(data$treatment=="select_polit")
data$tr[tr1]=1 # treated

tr2=which(data$treatment=="force_science")
data$tr[tr2]=0 # control

tr2=which(data$treatment=="select_science")
data$tr[tr2]=0 # control

table(data$tr) # 0: 277, 1: 598


# 10.3 gk.estimate
set.seed(12345)

data$effort = as.numeric(data$effort)
data$trivialization = as.numeric(data$trivialization)
data$`emotion_1` = as.numeric(data$`emotion_1`)
data$`emotion_2` = as.numeric(data$`emotion_2`)
data$`emotion_3` = as.numeric(data$`emotion_3`)
data$`emotion_4` = as.numeric(data$`emotion_4`)
data$`emotion_5` = as.numeric(data$`emotion_5`)
data$`emotion_6` = as.numeric(data$`emotion_6`)

gk.estimate=list()
gk.estimate[[1]]=estimate(rand = data$rand, tr = data$tr, y = data$effort)
gk.estimate[[2]]=estimate(rand = data$rand, tr = data$tr, y = data$trivialization)
gk.estimate[[3]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_1`)
gk.estimate[[4]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_2`)
gk.estimate[[5]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_3`)
gk.estimate[[6]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_4`)
gk.estimate[[7]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_5`)
gk.estimate[[8]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_6`)

list.name=c("Effort","Trivialization",
            "Anxious","Proud",
            "Angry","Hope",
            "Worry","Excited")

names(gk.estimate)=list.name # remember to give every estimate a name

gk.estimate_Effort = as.data.frame(gk.estimate$Effort)
gk.estimate_Trivialization = as.data.frame(gk.estimate$Trivialization)
gk.estimate_Anxious = as.data.frame(gk.estimate$Anxious)
gk.estimate_Proud = as.data.frame(gk.estimate$Proud)
gk.estimate_Angry = as.data.frame(gk.estimate$Angry)
gk.estimate_Hope = as.data.frame(gk.estimate$Hope)
gk.estimate_Worry = as.data.frame(gk.estimate$Worry)
gk.estimate_Excited = as.data.frame(gk.estimate$Excited)

gk.estimate_Effort['name'] = rep("Effort", times=4)
gk.estimate_Trivialization['name'] = rep("Trivialization", times=4)
gk.estimate_Anxious['name'] = rep("Anxious", times=4)
gk.estimate_Proud['name'] = rep("Proud", times=4)
gk.estimate_Angry['name'] = rep("Angry", times=4)
gk.estimate_Hope['name'] = rep("Hope", times=4)
gk.estimate_Worry['name'] = rep("Worry", times=4)
gk.estimate_Excited['name'] = rep("Excited", times=4)

gk.estimate.rap=rbind.data.frame(gk.estimate_Effort, gk.estimate_Trivialization,
                                 gk.estimate_Anxious, gk.estimate_Proud,
                                 gk.estimate_Angry, gk.estimate_Hope,
                                 gk.estimate_Worry, gk.estimate_Excited)

gk.estimate.rap$Estimate = round(gk.estimate.rap$Estimate, 3)
gk.estimate.rap$SE = round(gk.estimate.rap$SE, 3)
gk.estimate.rap$t = round(gk.estimate.rap$t, 3)
gk.estimate.rap$p = round(gk.estimate.rap$p, 3)

# 10.3 gk.estimate.rap
write.csv(gk.estimate.rap, file = file.path(output_dir, "10_potential_mechanisms.csv"), row.names = TRUE)



### 11 High/Low -> ATE/selector/non-selector (Y is PCA) ###

# 11.1 political ideology

# 11.1.1 high.ideology
dt_high.ideology <- as.data.frame(matrix(nrow=nrow(high.ideology)*2,ncol=0)) 
dt_high.ideology$central <- append(high.ideology$central_trust, high.ideology$central_satisfaction)
dt_high.ideology$local <- append(high.ideology$local_trust, high.ideology$local_satisfaction)
dt_high.ideology$evaluation <- append(high.ideology$evaluation_now, high.ideology$evaluation_future)
dt_high.ideology$confidence <- append(high.ideology$confidence_1, high.ideology$confidence_2)
dt_high.ideology$efficacy <- append(high.ideology$efficacy_1, high.ideology$efficacy_2)
dt_high.ideology$pride <- append(high.ideology$pride_1, high.ideology$pride_2)
dt_high.ideology$rand <- append(high.ideology$rand, high.ideology$rand)
dt_high.ideology$tr <- append(high.ideology$tr, high.ideology$tr)

result_dt_high.ideology=principal(dt_high.ideology[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_high.ideology
dt_high.ideology$pca_score=result_dt_high.ideology$score[,1]
dt_high.ideology$pca_score

gk_1.1.1.estimate=list()
gk_1.1.1.estimate[[1]] = estimate(rand = dt_high.ideology$rand, tr = dt_high.ideology$tr, y = dt_high.ideology$pca_score)

list_.name=c("PCA Score")
names(gk_1.1.1.estimate)=list_.name
gk_11.1.1.estimate_pca_score = as.data.frame(gk_1.1.1.estimate$`PCA Score`)
gk_11.1.1.estimate_pca_score['name'] = rep("PCA Score", times=4)
gk_11.1.1.estimate_pca_score$Estimate = round(gk_11.1.1.estimate_pca_score$Estimate, 3)
gk_11.1.1.estimate_pca_score$SE = round(gk_11.1.1.estimate_pca_score$SE, 3)
gk_11.1.1.estimate_pca_score$t = round(gk_11.1.1.estimate_pca_score$t, 3)
gk_11.1.1.estimate_pca_score$p = round(gk_11.1.1.estimate_pca_score$p, 3)
write.csv(gk_11.1.1.estimate_pca_score, file = file.path(output_dir, "11_1_pca_on_pro_regime_attitudes_high_ideology.csv"), row.names = TRUE)



# 11.1.2 low.ideology
dt_low.ideology <- as.data.frame(matrix(nrow=nrow(low.ideology)*2,ncol=0)) 
dt_low.ideology$central <- append(low.ideology$central_trust, low.ideology$central_satisfaction)
dt_low.ideology$local <- append(low.ideology$local_trust, low.ideology$local_satisfaction)
dt_low.ideology$evaluation <- append(low.ideology$evaluation_now, low.ideology$evaluation_future)
dt_low.ideology$confidence <- append(low.ideology$confidence_1, low.ideology$confidence_2)
dt_low.ideology$efficacy <- append(low.ideology$efficacy_1, low.ideology$efficacy_2)
dt_low.ideology$pride <- append(low.ideology$pride_1, low.ideology$pride_2)
dt_low.ideology$rand <- append(low.ideology$rand, low.ideology$rand)
dt_low.ideology$tr <- append(low.ideology$tr, low.ideology$tr)

result_dt_low.ideology=principal(dt_low.ideology[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_low.ideology
dt_low.ideology$pca_score=result_dt_low.ideology$score[,1]
dt_low.ideology$pca_score

gk_11.1.2.estimate=list()
gk_11.1.2.estimate[[1]] = estimate(rand = dt_low.ideology$rand, tr = dt_low.ideology$tr, y = dt_low.ideology$pca_score)

list_.name=c("PCA Score")
names(gk_11.1.2.estimate)=list_.name
gk_11.1.2.estimate_pca_score = as.data.frame(gk_11.1.2.estimate$`PCA Score`)
gk_11.1.2.estimate_pca_score['name'] = rep("PCA Score", times=4)
gk_11.1.2.estimate_pca_score$Estimate = round(gk_11.1.2.estimate_pca_score$Estimate, 3)
gk_11.1.2.estimate_pca_score$SE = round(gk_11.1.2.estimate_pca_score$SE, 3)
gk_11.1.2.estimate_pca_score$t = round(gk_11.1.2.estimate_pca_score$t, 3)
gk_11.1.2.estimate_pca_score$p = round(gk_11.1.2.estimate_pca_score$p, 3)
write.csv(gk_11.1.2.estimate_pca_score, file = file.path(output_dir, "11_1_pca_on_pro_regime_attitudes_low_ideology.csv"), row.names = TRUE)



# 11.2 political interest

# 11.2.1 high.interest
dt_high.interest <- as.data.frame(matrix(nrow=nrow(high.interest)*2,ncol=0))
dt_high.interest$central <- append(high.interest$central_trust, high.interest$central_satisfaction)
dt_high.interest$local <- append(high.interest$local_trust, high.interest$local_satisfaction)
dt_high.interest$evaluation <- append(high.interest$evaluation_now, high.interest$evaluation_future)
dt_high.interest$confidence <- append(high.interest$confidence_1, high.interest$confidence_2)
dt_high.interest$efficacy <- append(high.interest$efficacy_1, high.interest$efficacy_2)
dt_high.interest$pride <- append(high.interest$pride_1, high.interest$pride_2)
dt_high.interest$rand <- append(high.interest$rand, high.interest$rand)
dt_high.interest$tr <- append(high.interest$tr, high.interest$tr)

result_dt_high.interest=principal(dt_high.interest[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_high.interest
dt_high.interest$pca_score=result_dt_high.interest$score[,1]
dt_high.interest$pca_score

gk_11.2.1.estimate=list()
gk_11.2.1.estimate[[1]] = estimate(rand = dt_high.interest$rand, tr = dt_high.interest$tr, y = dt_high.interest$pca_score)

list_.name=c("PCA Score")
names(gk_11.2.1.estimate)=list_.name
gk_11.2.1.estimate_pca_score = as.data.frame(gk_11.2.1.estimate$`PCA Score`)
gk_11.2.1.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.2.1.estimate_pca_score$Estimate = round(gk_11.2.1.estimate_pca_score$Estimate, 3)
gk_11.2.1.estimate_pca_score$SE = round(gk_11.2.1.estimate_pca_score$SE, 3)
gk_11.2.1.estimate_pca_score$t = round(gk_11.2.1.estimate_pca_score$t, 3)
gk_11.2.1.estimate_pca_score$p = round(gk_11.2.1.estimate_pca_score$p, 3)

write.csv(gk_11.2.1.estimate_pca_score, file = file.path(output_dir, "11_2_pca_on_pro_regime_attitudes_high_interest.csv"), row.names = TRUE)



# 40.2.2 low.interest
dt_low.interest <- as.data.frame(matrix(nrow=nrow(low.interest)*2,ncol=0))
dt_low.interest$central <- append(low.interest$central_trust, low.interest$central_satisfaction)
dt_low.interest$local <- append(low.interest$local_trust, low.interest$local_satisfaction)
dt_low.interest$evaluation <- append(low.interest$evaluation_now, low.interest$evaluation_future)
dt_low.interest$confidence <- append(low.interest$confidence_1, low.interest$confidence_2)
dt_low.interest$efficacy <- append(low.interest$efficacy_1, low.interest$efficacy_2)
dt_low.interest$pride <- append(low.interest$pride_1, low.interest$pride_2)
dt_low.interest$rand <- append(low.interest$rand, low.interest$rand)
dt_low.interest$tr <- append(low.interest$tr, low.interest$tr)

result_dt_low.interest=principal(dt_low.interest[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_low.interest
dt_low.interest$pca_score=result_dt_low.interest$score[,1]
dt_low.interest$pca_score

gk_11.2.2.estimate=list()
gk_11.2.2.estimate[[1]] = estimate(rand = dt_low.interest$rand, tr = dt_low.interest$tr, y = dt_low.interest$pca_score)

list_.name=c("PCA Score")

names(gk_11.2.2.estimate)=list_.name

gk_11.2.2.estimate_pca_score = as.data.frame(gk_11.2.2.estimate$`PCA Score`)

gk_11.2.2.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.2.2.estimate_pca_score$Estimate = round(gk_11.2.2.estimate_pca_score$Estimate, 3)
gk_11.2.2.estimate_pca_score$SE = round(gk_11.2.2.estimate_pca_score$SE, 3)
gk_11.2.2.estimate_pca_score$t = round(gk_11.2.2.estimate_pca_score$t, 3)
gk_11.2.2.estimate_pca_score$p = round(gk_11.2.2.estimate_pca_score$p, 3)

write.csv(gk_11.2.2.estimate_pca_score, file = file.path(output_dir, "11_2_pca_on_pro_regime_attitudes_low_interest.csv"), row.names = TRUE)



# 11.3 political news consumption

# 11.3.1 high.news_consumption
dt_high.news_consumption <- as.data.frame(matrix(nrow=nrow(high.news_consumption)*2,ncol=0)) 
dt_high.news_consumption$central <- append(high.news_consumption$central_trust, high.news_consumption$central_satisfaction)
dt_high.news_consumption$local <- append(high.news_consumption$local_trust, high.news_consumption$local_satisfaction)
dt_high.news_consumption$evaluation <- append(high.news_consumption$evaluation_now, high.news_consumption$evaluation_future)
dt_high.news_consumption$confidence <- append(high.news_consumption$confidence_1, high.news_consumption$confidence_2)
dt_high.news_consumption$efficacy <- append(high.news_consumption$efficacy_1, high.news_consumption$efficacy_2)
dt_high.news_consumption$pride <- append(high.news_consumption$pride_1, high.news_consumption$pride_2)
dt_high.news_consumption$rand <- append(high.news_consumption$rand, high.news_consumption$rand)
dt_high.news_consumption$tr <- append(high.news_consumption$tr, high.news_consumption$tr)

result_dt_high.news_consumption=principal(dt_high.news_consumption[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_high.news_consumption
dt_high.news_consumption$pca_score=result_dt_high.news_consumption$score[,1]
dt_high.news_consumption$pca_score

gk_11.3.1.estimate=list()
gk_11.3.1.estimate[[1]] = estimate(rand = dt_high.news_consumption$rand, tr = dt_high.news_consumption$tr, y = dt_high.news_consumption$pca_score)

list_.name=c("PCA Score")
names(gk_11.3.1.estimate)=list_.name
gk_11.3.1.estimate_pca_score = as.data.frame(gk_11.3.1.estimate$`PCA Score`)
gk_11.3.1.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.3.1.estimate_pca_score$Estimate = round(gk_11.3.1.estimate_pca_score$Estimate, 3)
gk_11.3.1.estimate_pca_score$SE = round(gk_11.3.1.estimate_pca_score$SE, 3)
gk_11.3.1.estimate_pca_score$t = round(gk_11.3.1.estimate_pca_score$t, 3)
gk_11.3.1.estimate_pca_score$p = round(gk_11.3.1.estimate_pca_score$p, 3)
write.csv(gk_11.3.1.estimate_pca_score, file = file.path(output_dir, "11_3_pca_on_pro_regime_attitudes_high_news_consumption.csv"), row.names = TRUE)



# 11.3.2 low.news_consumption
dt_low.news_consumption <- as.data.frame(matrix(nrow=nrow(low.news_consumption)*2,ncol=0)) 
dt_low.news_consumption$central <- append(low.news_consumption$central_trust, low.news_consumption$central_satisfaction)
dt_low.news_consumption$local <- append(low.news_consumption$local_trust, low.news_consumption$local_satisfaction)
dt_low.news_consumption$evaluation <- append(low.news_consumption$evaluation_now, low.news_consumption$evaluation_future)
dt_low.news_consumption$confidence <- append(low.news_consumption$confidence_1, low.news_consumption$confidence_2)
dt_low.news_consumption$efficacy <- append(low.news_consumption$efficacy_1, low.news_consumption$efficacy_2)
dt_low.news_consumption$pride <- append(low.news_consumption$pride_1, low.news_consumption$pride_2)
dt_low.news_consumption$rand <- append(low.news_consumption$rand, low.news_consumption$rand)
dt_low.news_consumption$tr <- append(low.news_consumption$tr, low.news_consumption$tr)

result_dt_low.news_consumption=principal(dt_low.news_consumption[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_low.news_consumption
dt_low.news_consumption$pca_score=result_dt_low.news_consumption$score[,1]
dt_low.news_consumption$pca_score


gk_11.3.2.estimate=list()
gk_11.3.2.estimate[[1]] = estimate(rand = dt_low.news_consumption$rand, tr = dt_low.news_consumption$tr, y = dt_low.news_consumption$pca_score)

list_.name=c("PCA Score")

names(gk_11.3.2.estimate)=list_.name

gk_11.3.2.estimate_pca_score = as.data.frame(gk_11.3.2.estimate$`PCA Score`)

gk_11.3.2.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.3.2.estimate_pca_score$Estimate = round(gk_11.3.2.estimate_pca_score$Estimate, 3)
gk_11.3.2.estimate_pca_score$SE = round(gk_11.3.2.estimate_pca_score$SE, 3)
gk_11.3.2.estimate_pca_score$t = round(gk_11.3.2.estimate_pca_score$t, 3)
gk_11.3.2.estimate_pca_score$p = round(gk_11.3.2.estimate_pca_score$p, 3)
write.csv(gk_11.3.2.estimate_pca_score, file = file.path(output_dir, "11_3_pca_on_pro_regime_attitudes_low_news_consumption.csv"), row.names = TRUE)


